<!DOCTYPE html>
<html>
<head>
    <meta charset="UTF-8">
    <meta name="viewport" content="width=device-width, initial-scale=1, maximum-scale=1">
    <meta name="author" content="esy">
    
    <meta name="description" content="esy">
    
    
    
    
    
    
    <title>spacy的基础学习 | ESY</title>
    <link href="https://esyyes.github.io" rel="prefetch" />

    
<link rel="stylesheet" href="/css/bootstrap.min.css">
<link rel="stylesheet" href="/css/aos.css">
<link rel="stylesheet" href="/css/style.css">

    
<script src="/js/jquery.min.js"></script>

    
<script src="/js/bootstrap.min.js"></script>

    
<script src="/js/aos.js"></script>

    
<script src="/js/highslide/highslide-full.min.js"></script>

    
<link rel="stylesheet" href="/js/highslide/highslide.css">

    <style type="text/css">
        @media (max-width: 768px) {
            body {
                background-color: #f0f0f0;
                background: url('/imgs/xsbg.gif');
                background-attachment: fixed;
            }
        }
    </style>
    
    <!--<script type="text/javascript">
      if (document.images) {
        var avatar = new Image();
        avatar.src = '/imgs/avatar.jpg'
        var previews = 'preview1.jpg,preview2.jpg,preview3.jpg,preview4.jpg'.split(',')
        var previewsPreLoad = []
        for(var i = 0; i < length; i++) {
          previewsPreLoad.push(new Image())
          previewsPreLoad[previewsPreLoad.length - 1].src = '/imgs/preview' + previews[i]
        }
      }
    </script>-->
<meta name="generator" content="Hexo 5.2.0"></head>
<body>
    <!-- 背景轮播图功能 -->
    <section class="hidden-xs">
    <ul class="cb-slideshow">
        <li><span>天若</span></li>
        <li><span>有情</span></li>
        <li><span>天亦老</span></li>
        <li><span>我为</span></li>
        <li><span>长者</span></li>
        <li><span>续一秒</span></li>
    </ul>
</section>
    <!-- 欧尼酱功能, 谁用谁知道 -->
    
    <div class="gal-menu gal-dropdown">
    <div class="circle" id="gal">
        <div class="ring">
            <a href="https://esyyes.github.io" class="menuItem" style="left: 50%; top: 15%;">首页</a>
            
            <a class="menuItem" style="left: 80.3109%; top: 32.5%;">下一页</a>
            
            <a href="/archives" class="menuItem" style="left: 80.3109%; top: 67.5%;">归档</a>
            <a href="/about" class="menuItem" style="left: 50%; top: 85%;">关于</a>
            <a href="/message" class="menuItem" style="left: 19.6891%; top: 67.5%;">留言板</a>

            
            <a class="menuItem" style="left: 19.6891%; top: 32.5%;">上一页</a>
            
        </div>
        <audio id="audio" src="/imgs/oni.mp3"></audio>
    </div>
</div>
    
    <header class="navbar navbar-inverse" id="gal-header">
    <div class="container">
        <div class="navbar-header">
            <button type="button" class="navbar-toggle collapsed"
                    data-toggle="collapse" data-target=".bs-navbar-collapse"
                    aria-expanded="false">
                <span class="fa fa-lg fa-reorder"></span>
            </button>
            <a href="https://esyyes.github.io">
                
                <style>
                    #gal-header .navbar-brand {
                        height: 54px;
                        line-height: 24px;
                        font-size: 28px;
                        opacity: 1;
                        background-color: rgba(0,0,0,0);
                        text-shadow: 0 0 5px #fff,0 0 10px #fff,0 0 15px #fff,0 0 20px #228DFF,0 0 35px #228DFF,0 0 40px #228DFF,0 0 50px #228DFF,0 0 75px #228DFF;
                    }
                </style>
                <!-- 这里使用文字(navbar_text or config.title) -->
                <div class="navbar-brand">ESY</div>
                
            </a>
        </div>
        <div class="collapse navbar-collapse bs-navbar-collapse">
            <ul class="nav navbar-nav" id="menu-gal">
                
                
                <li class="">
                    <a href="/">
                        <i class="fa fa-home"></i>首页
                    </a>
                </li>
                
                
                
                <li class="">
                    <a href="/archives">
                        <i class="fa fa-archive"></i>归档
                    </a>
                </li>
                
                
                
                
                <li class="dropdown">
                    <!-- TODO 添加hover dropdown效果 -->
                    <a href="#" class="dropdown-toggle" data-toggle="dropdown"
                       aria-haspopup="true" aria-expanded="false" data-hover="dropdown">
                        <i class="fa fa-list"></i>分类
                    </a>
                    <ul class="dropdown-menu">
                        
                        
                        <li>
                            <a href="/categories/py-study/">py_study</a>
                        </li>
                        
                        <li>
                            <a href="/categories/nlp/">nlp</a>
                        </li>
                        
                        <li>
                            <a href="/categories/Graduation-work/">Graduation work</a>
                        </li>
                        
                        <li>
                            <a href="/categories/work/">work</a>
                        </li>
                        
                        <li>
                            <a href="/categories/hexo/">hexo</a>
                        </li>
                        
                        <li>
                            <a href="/categories/hexo%E5%AE%8C%E5%96%84/">-hexo完善</a>
                        </li>
                        
                        
                        <li>
                            <a href="/categories">...</a>
                        </li>
                        
                        
                    </ul>
                </li>
                
                
                
                
                
                <li class="dropdown">
                    <!-- TODO 添加hover dropdown效果 -->
                    <a href="#" class="dropdown-toggle" data-toggle="dropdown"
                       aria-haspopup="true" aria-expanded="false" data-hover="dropdown">
                        <i class="fa fa-tags"></i>标签
                    </a>
                    <ul class="dropdown-menu">
                        
                        
                        <li>
                            <a href="/tags/py-study/">py_study</a>
                        </li>
                        
                        <li>
                            <a href="/tags/nlp/">nlp</a>
                        </li>
                        
                        <li>
                            <a href="/tags/Graduation-work/">Graduation work</a>
                        </li>
                        
                        <li>
                            <a href="/tags/work/">work</a>
                        </li>
                        
                        <li>
                            <a href="/tags/hexo/">hexo</a>
                        </li>
                        
                        <li>
                            <a href="/tags/%E4%B8%AA%E4%BA%BA%E5%8D%9A%E5%AE%A2%E6%90%AD%E5%BB%BA/">-个人博客搭建</a>
                        </li>
                        
                        
                        <li>
                            <a href="/tags">...</a>
                        </li>
                        
                        
                    </ul>
                </li>
                
                
                
                
                <li class="">
                    <a href="/about">
                        <i class="fa fa-user"></i>关于我
                    </a>
                </li>
                
                
            </ul>
        </div>
    </div>
</header>
    <div id="gal-body">
        <div class="container">
            <div class="row">
                <div class="col-md-8 gal-right" id="mainstay">
                    
<article class="article well article-body" id="article">
    <div class="breadcrumb">
        <i class="fa fa-home"></i>
        <a href="https://esyyes.github.io">ESY</a>
        >
        <span>spacy的基础学习</span>
    </div>
    <!-- 大型设备详细文章 -->
    <div class="hidden-xs">
        <div class="title-article">
            <h1>
                <a href="/2020/07/15/python%20work/spacy%E7%9A%84%E5%9F%BA%E7%A1%80%E5%AD%A6%E4%B9%A0/">spacy的基础学习</a>
            </h1>
        </div>
        <div class="tag-article">
            
            <span class="label label-gal">
                <i class="fa fa-tags"></i>
                
                <a href="/tags/work/">work</a>
                
            </span>
            
            <span class="label label-gal">
                <i class="fa fa-calendar"></i> 2020-07-15
            </span>
            
        </div>
    </div>
    <!-- 小型设备详细文章 -->
    <div class="visible-xs">
        <center>
            <div class="title-article">
                <h4>
                    <a href="/2020/07/15/python%20work/spacy%E7%9A%84%E5%9F%BA%E7%A1%80%E5%AD%A6%E4%B9%A0/">spacy的基础学习</a>
                </h4>
            </div>
            <p>
                <i class="fa fa-calendar"></i> 2020-07-15
            </p>
            <p>
                
                <i class="fa fa-tags"></i>
                
                <a href="/tags/work/">work</a>
                
                
                
            </p>
        </center>
    </div>
    <div class="content-article">
        <h1 id="spacy"><a href="#spacy" class="headerlink" title="spacy"></a>spacy</h1><h2 id="spacy的定义"><a href="#spacy的定义" class="headerlink" title="spacy的定义"></a>spacy的定义</h2><p> spaCy是一个python开源模块，用于<strong>处理自然语言的大量文本</strong>。比如，一段文字的关键是什么？在段落中上下文的意思是什么？谁对谁做了什么？那些公司和产品名称特指什么？一个单词和其他其他单词相似程度如何？作为人类，理解自然语言比较容易，但如果让计算机去理解文本的意思，这都是不可回避的问题。 </p>
<p> spaCy就是帮助你使用计算器程序去处理和<strong>理解</strong>海量文本的工具。在目前来说，号称是<strong>速度最快、更加适合实际应用的工业级产品</strong>。spaCy可以帮助你构建<strong>信息提取、自然语言理解以及深度学习的预处理</strong> 。</p>
<p>spaCy是世界上最快的工业级自然语言处理工具。 支持多种自然语言处理基本功能。</p>
<p>官网地址：<a href="https://link.zhihu.com/?target=https://spacy.io/">https://spacy.io/</a></p>
<p>spaCy主要功能包括分词、词性标注、词干化、命名实体识别、名词短语提取等等。</p>
<h3 id="安装：pip-install-spacy"><a href="#安装：pip-install-spacy" class="headerlink" title="安装：pip install spacy"></a>安装：pip install spacy</h3><p> 与平台不同，spaCy不提供软件即服务或Web应用程序。它是一个开放源代码库，旨在帮助您构建NLP应用程序，而不是消耗性服务。 </p>
<p> 这是一个开源库。 </p>
<p>在文档中，您会提到spaCy的功能。其中一些涉及语言概念，而其他则涉及更通用的机器学习功能。</p>
<table>
<thead>
<tr>
<th>名称</th>
<th>描述</th>
</tr>
</thead>
<tbody><tr>
<td><strong>代币化</strong></td>
<td>将文本分割成单词，标点符号等</td>
</tr>
<tr>
<td><strong>词性</strong>（POS）<strong>标记</strong></td>
<td>将单词类型分配给标记，例如动词或名词。</td>
</tr>
<tr>
<td><strong>依赖解析</strong></td>
<td>分配语法相关性标签，描述各个标记（例如主题或客体）之间的关系。</td>
</tr>
<tr>
<td><strong>合法化</strong></td>
<td>分配单词的基本形式。例如，“ was”的引理是“ be”，“ rats”的引理是“ rat”。</td>
</tr>
<tr>
<td><strong>句子边界检测</strong>（SBD）</td>
<td>查找和分割单个句子。</td>
</tr>
<tr>
<td><strong>命名实体识别</strong>（NER）</td>
<td>标记命名为“真实世界”的对象，例如人员，公司或位置。</td>
</tr>
<tr>
<td><strong>实体链接</strong>（EL）</td>
<td>消除文本实体与知识库中唯一标识符的歧义。</td>
</tr>
<tr>
<td><strong>相似</strong></td>
<td>比较单词，文本跨度和文档，以及它们之间的相似程度。</td>
</tr>
<tr>
<td><strong>文字分类</strong></td>
<td>为整个文档或文档的一部分分配类别或标签。</td>
</tr>
<tr>
<td><strong>基于规则的匹配</strong></td>
<td>根据标记的文本和语言注释来查找标记序列，类似于正则表达式。</td>
</tr>
<tr>
<td><strong>训练</strong></td>
<td>更新和改进统计模型的预测。</td>
</tr>
<tr>
<td><strong>序列化</strong></td>
<td>将对象保存到文件或字节字符串</td>
</tr>
</tbody></table>
<table>
<thead>
<tr>
<th>NAME</th>
<th>DESCRIPTION</th>
</tr>
</thead>
<tbody><tr>
<td><strong>Tokenization</strong></td>
<td>Segmenting text into words, punctuations marks etc.</td>
</tr>
<tr>
<td><strong>Part-of-speech</strong> (POS) <strong>Tagging</strong></td>
<td>Assigning word types to tokens, like verb or noun.</td>
</tr>
<tr>
<td><strong>Dependency Parsing</strong></td>
<td>Assigning syntactic dependency labels, describing the relations between individual tokens, like subject or object.</td>
</tr>
<tr>
<td><strong>Lemmatization</strong></td>
<td>Assigning the base forms of words. For example, the lemma of “was” is “be”, and the lemma of “rats” is “rat”.</td>
</tr>
<tr>
<td><strong>Sentence Boundary Detection</strong> (SBD)</td>
<td>Finding and segmenting individual sentences.</td>
</tr>
<tr>
<td><strong>Named Entity Recognition</strong> (NER)</td>
<td>Labelling named “real-world” objects, like persons, companies or locations.</td>
</tr>
<tr>
<td><strong>Entity Linking</strong> (EL)</td>
<td>Disambiguating textual entities to unique identifiers in a Knowledge Base.</td>
</tr>
<tr>
<td><strong>Similarity</strong></td>
<td>Comparing words, text spans and documents and how similar they are to each other.</td>
</tr>
<tr>
<td><strong>Text Classification</strong></td>
<td>Assigning categories or labels to a whole document, or parts of a document.</td>
</tr>
<tr>
<td><strong>Rule-based Matching</strong></td>
<td>Finding sequences of tokens based on their texts and linguistic annotations, similar to regular expressions.</td>
</tr>
<tr>
<td><strong>Training</strong></td>
<td>Updating and improving a statistical model’s predictions.</td>
</tr>
<tr>
<td><strong>Serialization</strong></td>
<td>Saving objects to files or byte strings.</td>
</tr>
</tbody></table>
<p>尽管spaCy的某些功能可以独立运行，但是其他功能需要 加载<a target="_blank" rel="noopener" href="https://spacy.io/models">统计模型</a>，这使spaCy可以<strong>预测</strong> 语言注释-例如，单词是动词还是名词。spaCy当前提供用于多种语言的统计模型，可以将其安装为单独的Python模块。模型的大小，速度，内存使用量，准确性和所包含的数据可能会有所不同。您选择的模型始终取决于您的用例和您使用的文本。对于通用用例，小型的默认模型始终是一个好的开始。它们通常包括以下组件：</p>
<ul>
<li>词性标记器，依赖性分析器和命名实体识别器的<strong>二进制权重</strong>，以在上下文中预测这些注释。</li>
<li><strong>词汇</strong>中的词汇<strong>条目</strong>，即单词及其与上下文无关的属性，例如形状或拼写。</li>
<li><strong>数据文件，</strong>例如lemmatization规则和查找表。</li>
<li><strong>单词向量</strong>，即<strong>单词</strong>的多维含义表示，可让您确定它们之间的相似程度。</li>
<li><strong>配置</strong>选项（例如语言和处理管道设置）可在您加载模型时将spaCy置于正确的状态。</li>
</ul>
<p> 安装默认模型，获取代码以从spaCy内加载它，并提供示例进行测试。有关更多选项，请参见下面有关可用型号的部分 </p>
<figure class="highlight plain"><table><tr><td class="gutter"><pre><span class="line">1</span><br><span class="line">2</span><br><span class="line">3</span><br><span class="line">4</span><br><span class="line">5</span><br></pre></td><td class="code"><pre><span class="line">import spacy</span><br><span class="line">nlp &#x3D; spacy.load(&quot;en_core_web_sm&quot;)</span><br><span class="line">两种形式</span><br><span class="line">import en_core_web_sm</span><br><span class="line">nlp &#x3D; en_core_web_sm.load()</span><br></pre></td></tr></table></figure>

<p>Download best-matching version of specific model for your spaCy installation</p>
<p>为spaCy安装下载特定型号的最佳匹配版本</p>
<p> python -m spacy download en_core_web_sm </p>
<p>Out-of-the-box: download best-matching default model and create shortcut link </p>
<p>开箱即用：下载最匹配的默认模型并创建快捷链接</p>
<p>python -m spacy download en </p>
<p>Download exact model version (doesn’t create shortcut link) python -m spacy download en_core_web_sm-2.2.0 –direct </p>
<p>下载精确的模型版本（不创建快捷链接）python-mspacy下载en_core_web_sm-2.2.0–直接</p>
<ul>
<li><p><strong>尺寸</strong>：型号尺寸指示符（<code>sm</code>，<code>md</code>或<code>lg</code>）</p>
<p>已经有了en_core_web_md-2.2.5.tar.gz这个版本下载对应的spacy2.2.4.版本</p>
<p>已经能够正常运行，参考网页</p>
<p> <a target="_blank" rel="noopener" href="https://spacy.io/">https://spacy.io/</a> </p>
<figure class="highlight python"><table><tr><td class="gutter"><pre><span class="line">1</span><br><span class="line">2</span><br><span class="line">3</span><br><span class="line">4</span><br><span class="line">5</span><br><span class="line">6</span><br><span class="line">7</span><br><span class="line">8</span><br><span class="line">9</span><br><span class="line">10</span><br><span class="line">11</span><br><span class="line">12</span><br><span class="line">13</span><br><span class="line">14</span><br><span class="line">15</span><br><span class="line">16</span><br><span class="line">17</span><br><span class="line">18</span><br><span class="line">19</span><br><span class="line">20</span><br><span class="line">21</span><br><span class="line">22</span><br><span class="line">23</span><br><span class="line">24</span><br><span class="line">25</span><br><span class="line">26</span><br><span class="line">27</span><br><span class="line">28</span><br><span class="line">29</span><br><span class="line">30</span><br><span class="line">31</span><br></pre></td><td class="code"><pre><span class="line"><span class="comment"># -*- coding: utf-8 -*-</span></span><br><span class="line"><span class="comment"># @Time     : 2020/7/15</span></span><br><span class="line"><span class="comment"># @Author   : esy</span></span><br><span class="line"></span><br><span class="line"><span class="keyword">import</span> en_core_web_md</span><br><span class="line">nlp = en_core_web_md.load()</span><br><span class="line">text = (<span class="string">&quot;When Sebastian Thrun started working on self-driving cars at &quot;</span></span><br><span class="line">        <span class="string">&quot;Google in 2007, few people outside of the company took him &quot;</span></span><br><span class="line">        <span class="string">&quot;seriously. “I can tell you very senior CEOs of major American &quot;</span></span><br><span class="line">        <span class="string">&quot;car companies would shake my hand and turn away because I wasn’t &quot;</span></span><br><span class="line">        <span class="string">&quot;worth talking to,” said Thrun, in an interview with Recode earlier &quot;</span></span><br><span class="line">        <span class="string">&quot;this week.&quot;</span>)</span><br><span class="line">doc = nlp(text)</span><br><span class="line"></span><br><span class="line"><span class="comment"># Analyze syntax</span></span><br><span class="line">print(<span class="string">&quot;Noun phrases:&quot;</span>, [chunk.text <span class="keyword">for</span> chunk <span class="keyword">in</span> doc.noun_chunks])</span><br><span class="line">print(<span class="string">&quot;Verbs:&quot;</span>, [token.lemma_ <span class="keyword">for</span> token <span class="keyword">in</span> doc <span class="keyword">if</span> token.pos_ == <span class="string">&quot;VERB&quot;</span>])</span><br><span class="line"></span><br><span class="line"><span class="comment"># Find named entities, phrases and concepts</span></span><br><span class="line"><span class="keyword">for</span> entity <span class="keyword">in</span> doc.ents:</span><br><span class="line">    print(entity.text, entity.label_)</span><br><span class="line">运行结果</span><br><span class="line">Noun phrases: [<span class="string">&#x27;Sebastian Thrun&#x27;</span>, <span class="string">&#x27;self-driving cars&#x27;</span>, <span class="string">&#x27;Google&#x27;</span>, <span class="string">&#x27;few people&#x27;</span>, <span class="string">&#x27;the company&#x27;</span>, <span class="string">&#x27;him&#x27;</span>, <span class="string">&#x27;I&#x27;</span>, <span class="string">&#x27;you&#x27;</span>, <span class="string">&#x27;very senior CEOs&#x27;</span>, <span class="string">&#x27;major American car companies&#x27;</span>, <span class="string">&#x27;my hand&#x27;</span>, <span class="string">&#x27;I&#x27;</span>, <span class="string">&#x27;Thrun&#x27;</span>, <span class="string">&#x27;an interview&#x27;</span>, <span class="string">&#x27;Recode&#x27;</span>]</span><br><span class="line">Verbs: [<span class="string">&#x27;start&#x27;</span>, <span class="string">&#x27;work&#x27;</span>, <span class="string">&#x27;drive&#x27;</span>, <span class="string">&#x27;take&#x27;</span>, <span class="string">&#x27;can&#x27;</span>, <span class="string">&#x27;tell&#x27;</span>, <span class="string">&#x27;would&#x27;</span>, <span class="string">&#x27;shake&#x27;</span>, <span class="string">&#x27;turn&#x27;</span>, <span class="string">&#x27;talk&#x27;</span>, <span class="string">&#x27;say&#x27;</span>]</span><br><span class="line">Sebastian Thrun PERSON</span><br><span class="line">Google ORG</span><br><span class="line"><span class="number">2007</span> DATE</span><br><span class="line">American NORP</span><br><span class="line">Thrun PERSON</span><br><span class="line">Recode ORG</span><br><span class="line">earlier this week DATE</span><br></pre></td></tr></table></figure>

<h2 id="spacy的基础分析"><a href="#spacy的基础分析" class="headerlink" title="spacy的基础分析"></a>spacy的基础分析</h2></li>
</ul>
<figure class="highlight python"><table><tr><td class="gutter"><pre><span class="line">1</span><br><span class="line">2</span><br><span class="line">3</span><br><span class="line">4</span><br><span class="line">5</span><br><span class="line">6</span><br><span class="line">7</span><br></pre></td><td class="code"><pre><span class="line"><span class="keyword">import</span> en_core_web_md</span><br><span class="line">nlp = en_core_web_md.load()</span><br><span class="line">doc = nlp(<span class="string">&quot;Apple is looking at buying U.K. startup for $1 billion&quot;</span>)</span><br><span class="line"></span><br><span class="line"><span class="keyword">for</span> token <span class="keyword">in</span> doc:</span><br><span class="line">    print(token.text, token.lemma_, token.pos_, token.tag_, token.dep_,</span><br><span class="line">            token.shape_, token.is_alpha, token.is_stop)</span><br></pre></td></tr></table></figure>

<table>
<thead>
<tr>
<th>TEXT</th>
<th>LEMMA</th>
<th>POS</th>
<th>TAG</th>
<th>DEP</th>
<th>SHAPE</th>
<th>ALPHA</th>
<th>STOP</th>
</tr>
</thead>
<tbody><tr>
<td>Apple</td>
<td>apple</td>
<td><code>PROPN</code></td>
<td><code>NNP</code></td>
<td><code>nsubj</code></td>
<td><code>Xxxxx</code></td>
<td><code>True</code></td>
<td><code>False</code></td>
</tr>
<tr>
<td>is</td>
<td>be</td>
<td><code>AUX</code></td>
<td><code>VBZ</code></td>
<td><code>aux</code></td>
<td><code>xx</code></td>
<td><code>True</code></td>
<td><code>True</code></td>
</tr>
<tr>
<td>looking</td>
<td>look</td>
<td><code>VERB</code></td>
<td><code>VBG</code></td>
<td><code>ROOT</code></td>
<td><code>xxxx</code></td>
<td><code>True</code></td>
<td><code>False</code></td>
</tr>
<tr>
<td>at</td>
<td>at</td>
<td><code>ADP</code></td>
<td><code>IN</code></td>
<td><code>prep</code></td>
<td><code>xx</code></td>
<td><code>True</code></td>
<td><code>True</code></td>
</tr>
<tr>
<td>buying</td>
<td>buy</td>
<td><code>VERB</code></td>
<td><code>VBG</code></td>
<td><code>pcomp</code></td>
<td><code>xxxx</code></td>
<td><code>True</code></td>
<td><code>False</code></td>
</tr>
<tr>
<td>U.K.</td>
<td>u.k.</td>
<td><code>PROPN</code></td>
<td><code>NNP</code></td>
<td><code>compound</code></td>
<td><code>X.X.</code></td>
<td><code>False</code></td>
<td><code>False</code></td>
</tr>
<tr>
<td>startup</td>
<td>startup</td>
<td><code>NOUN</code></td>
<td><code>NN</code></td>
<td><code>dobj</code></td>
<td><code>xxxx</code></td>
<td><code>True</code></td>
<td><code>False</code></td>
</tr>
<tr>
<td>for</td>
<td>for</td>
<td><code>ADP</code></td>
<td><code>IN</code></td>
<td><code>prep</code></td>
<td><code>xxx</code></td>
<td><code>True</code></td>
<td><code>True</code></td>
</tr>
<tr>
<td>$</td>
<td>$</td>
<td><code>SYM</code></td>
<td><code>$</code></td>
<td><code>quantmod</code></td>
<td><code>$</code></td>
<td><code>False</code></td>
<td><code>False</code></td>
</tr>
<tr>
<td>1</td>
<td>1</td>
<td><code>NUM</code></td>
<td><code>CD</code></td>
<td><code>compound</code></td>
<td><code>d</code></td>
<td><code>False</code></td>
<td><code>False</code></td>
</tr>
<tr>
<td>billion</td>
<td>billion</td>
<td><code>NUM</code></td>
<td><code>CD</code></td>
<td><code>pobj</code></td>
<td><code>xxxx</code></td>
<td><code>True</code></td>
<td><code>False</code></td>
</tr>
</tbody></table>
<ul>
<li><strong>Text:</strong> The original word text.<strong>文字：</strong>原始文字。</li>
<li><strong>Lemma:</strong> The base form of the word.<strong>引理：</strong>该词的基本形式。</li>
<li><strong>POS:</strong> The simple <a target="_blank" rel="noopener" href="https://universaldependencies.org/docs/u/pos/">UPOS</a> part-of-speech tag.<strong>POS：</strong>简单的<a target="_blank" rel="noopener" href="https://universaldependencies.org/docs/u/pos/">UPOS</a>词性标签。</li>
<li><strong>Tag:</strong> The detailed part-of-speech tag.<strong>标记：</strong>详细的词性标记。</li>
<li><strong>Dep:</strong> Syntactic dependency, i.e. the relation between tokens.<strong>Dep：</strong>语法依赖性，即标记之间的关系。</li>
<li><strong>Shape:</strong> The word shape – capitalization, punctuation, digits.<strong>形状：</strong>单词形状-大写，标点，数字。</li>
<li><strong>is alpha:</strong> Is the token an alpha character?<strong>是字母：</strong>令牌是字母字符吗？</li>
<li><strong>is stop:</strong> Is the token part of a stop list, i.e. the most common words of the language?<strong>是停止：</strong>标记是停止列表的一部分，即语言中最常见的词吗？</li>
</ul>
<p>POS!词性标签</p>
<p>词性列表： <a target="_blank" rel="noopener" href="https://spacy.io/api/annotation#pos-tagging">https://spacy.io/api/annotation#pos-tagging</a> </p>
<table>
<thead>
<tr>
<th>POS</th>
<th>DESCRIPTION</th>
<th>EXAMPLES</th>
</tr>
</thead>
<tbody><tr>
<td><code>ADJ</code></td>
<td>adjective</td>
<td>big, old, green, incomprehensible, first</td>
</tr>
<tr>
<td><code>ADP</code></td>
<td>adposition</td>
<td>in, to, during</td>
</tr>
<tr>
<td><code>ADV</code></td>
<td>adverb</td>
<td>very, tomorrow, down, where, there</td>
</tr>
<tr>
<td><code>AUX</code></td>
<td>auxiliary</td>
<td>is, has (done), will (do), should (do)</td>
</tr>
<tr>
<td><code>CONJ</code></td>
<td>conjunction</td>
<td>and, or, but</td>
</tr>
<tr>
<td><code>CCONJ</code></td>
<td>coordinating conjunction</td>
<td>and, or, but</td>
</tr>
<tr>
<td><code>DET</code></td>
<td>determiner</td>
<td>a, an, the</td>
</tr>
<tr>
<td><code>INTJ</code></td>
<td>interjection</td>
<td>psst, ouch, bravo, hello</td>
</tr>
<tr>
<td><code>NOUN</code></td>
<td>noun</td>
<td>girl, cat, tree, air, beauty</td>
</tr>
<tr>
<td><code>NUM</code></td>
<td>numeral</td>
<td>1, 2017, one, seventy-seven, IV, MMXIV</td>
</tr>
<tr>
<td><code>PART</code></td>
<td>particle</td>
<td>’s, not,</td>
</tr>
<tr>
<td><code>PRON</code></td>
<td>pronoun</td>
<td>I, you, he, she, myself, themselves, somebody</td>
</tr>
<tr>
<td><code>PROPN</code></td>
<td>proper noun</td>
<td>Mary, John, London, NATO, HBO</td>
</tr>
<tr>
<td><code>PUNCT</code></td>
<td>punctuation</td>
<td>., (, ), ?</td>
</tr>
<tr>
<td><code>SCONJ</code></td>
<td>subordinating conjunction</td>
<td>if, while, that</td>
</tr>
<tr>
<td><code>SYM</code></td>
<td>symbol</td>
<td>$, %, §, ©, +, −, ×, ÷, =, :), 😝</td>
</tr>
<tr>
<td><code>VERB</code></td>
<td>verb</td>
<td>run, runs, running, eat, ate, eating</td>
</tr>
<tr>
<td><code>X</code></td>
<td>other</td>
<td>sfpksdpsxmsa</td>
</tr>
<tr>
<td><code>SPACE</code></td>
<td>space</td>
<td></td>
</tr>
</tbody></table>
<p>CC     coordinatingconjunction 并列连词</p>
<p>CD     cardinaldigit  纯数  基数</p>
<p>DT     determiner  限定词（置于名词前起限定作用，如 the、some、my 等）</p>
<p>EX     existentialthere (like:”there is”… think of it like “thereexists”)   存在句；存现句</p>
<p>FW     foreignword  外来语；外来词；外文原词</p>
<p>IN     preposition/subordinating conjunction介词/从属连词；主从连词；从属连接词</p>
<p>JJ     adjective    ‘big’  形容词</p>
<p>JJR    adjective, comparative ‘bigger’ （形容词或副词的）比较级形式</p>
<p>JJS    adjective, superlative ‘biggest’  （形容词或副词的）最高级</p>
<p>LS     listmarker  1)</p>
<p>MD     modal (could, will) 形态的，形式的 , 语气的；情态的</p>
<p>NN     noun, singular ‘desk’ 名词单数形式</p>
<p>NNS    nounplural  ‘desks’  名词复数形式</p>
<p>NNP    propernoun, singular     ‘Harrison’ 专有名词</p>
<p>NNPS  proper noun, plural ‘Americans’  专有名词复数形式</p>
<p>PDT    predeterminer      ‘all the kids’  前位限定词</p>
<p>POS    possessiveending  parent’s   属有词  结束语</p>
<p>PRP    personalpronoun   I, he, she  人称代词</p>
<p>PRP$  possessive pronoun my, his, hers  物主代词</p>
<p>RB     adverb very, silently, 副词    非常  静静地</p>
<p>RBR    adverb,comparative better   （形容词或副词的）比较级形式</p>
<p>RBS    adverb,superlative best    （形容词或副词的）最高级</p>
<p>RP     particle     give up 小品词(与动词构成短语动词的副词或介词)</p>
<p>TO     to    go ‘to’ the store.</p>
<p>UH     interjection errrrrrrrm  感叹词；感叹语</p>
<p>VB     verb, baseform    take   动词</p>
<p>VBD    verb, pasttense   took   动词   过去时；过去式</p>
<p>VBG    verb,gerund/present participle taking 动词  动名词/现在分词</p>
<p>VBN    verb, pastparticiple     taken 动词  过去分词</p>
<p>VBP    verb,sing. present, non-3d     take 动词  现在</p>
<p>VBZ    verb, 3rdperson sing. present  takes   动词  第三人称</p>
<p>WDT    wh-determiner      which 限定词（置于名词前起限定作用，如 the、some、my 等）</p>
<p>WP     wh-pronoun   who, what 代词（代替名词或名词词组的单词）</p>
<p>WP$    possessivewh-pronoun     whose  所有格；属有词</p>
<p>WRB    wh-abverb    where, when 副词</p>
<p>原文链接：<a target="_blank" rel="noopener" href="https://blog.csdn.net/jasonjarvan/article/details/79955664">https://blog.csdn.net/jasonjarvan/article/details/79955664</a></p>

    </div>
</article>


                </div>
                <aside class="col-md-4 gal-left" id="sidebar">
    <!-- 此为sidebar的搜索框, 非搜索结果页面 -->
<aside id="sidebar-search">
    <div class="search hidden-xs" data-aos="fade-up" data-aos-duration="2000">
        <form class="form-inline clearfix" id="search-form" method="get"
              action="/search/index.html">
            <input type="text" name="s" class="form-control" id="searchInput" placeholder="搜索文章~" autocomplete="off">
            <button class="btn btn-danger btn-gal" type="submit">
                <i class="fa fa-search"></i>
            </button>
        </form>
    </div>
</aside>
    <aside id="sidebar-author">
    <div class="panel panel-gal" data-aos="flip-right" data-aos-duration="3000">
        <div class="panel-heading" style="text-align: center">
            <i class="fa fa-quote-left"></i>
            esy
            <i class="fa fa-quote-right"></i>
        </div>
        <div class="author-panel text-center">
            <img src="/imgs/avatar.jpg" width="140" height="140"
                 alt="个人头像" class="author-image">
            <p class="author-description"><p>esy</p>
</p>
        </div>
    </div>
</aside>
    
    <aside id="sidebar-recent_comments">
    <div class="panel panel-gal recent hidden-xs" data-aos="fade-up" data-aos-duration="2000">
        <div class="panel-heading">
            <i class="fa fa-comments"></i>
            最新评论
            <i class="fa fa-times-circle panel-remove"></i>
            <i class="fa fa-chevron-circle-up panel-toggle"></i>
        </div>
        <ul class="list-group list-group-flush"></ul>
    </div>
</aside>
    
    <!-- 要配置好leancloud才能开启此小工具 -->
    
    
    <aside id="sidebar-recent_posts">
    <div class="panel panel-gal recent hidden-xs" data-aos="fade-up" data-aos-duration="2000">
        <div class="panel-heading">
            <i class="fa fa-refresh"></i>
            近期文章
            <i class="fa fa-times-circle panel-remove"></i>
            <i class="fa fa-chevron-circle-up panel-toggle"></i>
        </div>
        <ul class="list-group list-group-flush">
            
            <li class="list-group-item">
                <span class="post-title">
                    <a href="/2020/11/05/python%20work/%E6%9C%80%E5%B0%8F%E4%BA%8C%E4%B9%98%E6%B3%95/">最小二乘法</a>
                </span>
            </li>
            
            <li class="list-group-item">
                <span class="post-title">
                    <a href="/2020/11/05/python%20work/%E7%BB%9F%E8%AE%A1%E5%AD%A6%E4%B9%A0-%E7%AC%AC%E4%B8%80%E7%AB%A0/">统计学习--第一章</a>
                </span>
            </li>
            
            <li class="list-group-item">
                <span class="post-title">
                    <a href="/2020/11/04/python%20work/hello-world/">Hello World</a>
                </span>
            </li>
            
            <li class="list-group-item">
                <span class="post-title">
                    <a href="/2020/11/03/python%20work/%E5%88%86%E7%B1%BB%E6%A8%A1%E5%9E%8B%E7%9A%84%E8%AF%84%E4%BC%B0%E6%8C%87%E6%A0%87/">分类模型的评估指标</a>
                </span>
            </li>
            
            <li class="list-group-item">
                <span class="post-title">
                    <a href="/2020/10/21/python%20work/10-21-%E7%88%AC%E8%99%AB%E5%9F%BA%E7%A1%80/">10-21 爬虫基础</a>
                </span>
            </li>
            
            <li class="list-group-item">
                <span class="post-title">
                    <a href="/2020/09/25/python%20work/CRF%E7%9A%84%E6%95%B4%E4%BD%93%E6%B5%81%E7%A8%8B%E7%BB%93%E6%9E%9C/">CRF的整体流程结果</a>
                </span>
            </li>
            
            <li class="list-group-item">
                <span class="post-title">
                    <a href="/2020/09/25/python%20work/nlp-crf%E6%A8%A1%E5%9E%8B/">nlp_crf模型</a>
                </span>
            </li>
            
            <li class="list-group-item">
                <span class="post-title">
                    <a href="/2020/09/25/python%20work/%E6%95%B0%E5%AD%A6%E5%BB%BA%E6%A8%A1%E9%97%AE%E9%A2%983/">数学建模问题3</a>
                </span>
            </li>
            
            <li class="list-group-item">
                <span class="post-title">
                    <a href="/2020/09/25/python%20work/%E6%95%B0%E5%AD%A6%E5%BB%BA%E6%A8%A1%E9%97%AE%E9%A2%982/">数学建模问题2</a>
                </span>
            </li>
            
            <li class="list-group-item">
                <span class="post-title">
                    <a href="/2020/09/25/python%20work/%E6%95%B0%E5%AD%A6%E5%BB%BA%E6%A8%A1%E9%97%AE%E9%A2%981/">数学建模问题1</a>
                </span>
            </li>
            
        </ul>
    </div>
</aside>
    
    
    <aside id="sidebar-rand_posts">
    <div class="panel panel-gal recent hidden-xs" data-aos="fade-up" data-aos-duration="2000">
        <div class="panel-heading">
            <i class="fa fa-refresh"></i>
            随机文章
            <i class="fa fa-times-circle panel-remove"></i>
            <i class="fa fa-chevron-circle-up panel-toggle"></i>
        </div>
        <ul class="list-group list-group-flush">
            
            
            <li class="list-group-item">
                <span class="post-title">
                    <a href="/2020/09/25/python%20work/nlp-crf%E6%A8%A1%E5%9E%8B/">nlp_crf模型</a>
                </span>
            </li>
            
            <li class="list-group-item">
                <span class="post-title">
                    <a href="/2020/02/08/python%20work/python-%E6%BB%91%E5%8A%A8%E5%88%87%E7%89%87/">python__滑动切片</a>
                </span>
            </li>
            
            <li class="list-group-item">
                <span class="post-title">
                    <a href="/2020/02/08/python%20work/python%E2%80%94%E2%80%94%E6%BB%91%E5%8A%A8%E5%88%87%E7%89%87%E4%B9%8B%E6%95%B0%E7%BB%84/">python——滑动切片之数组</a>
                </span>
            </li>
            
            <li class="list-group-item">
                <span class="post-title">
                    <a href="/2019/12/01/python%20work/python%E5%9F%BA%E7%A1%80%E4%B9%8B%E7%BB%98%E5%88%B6%E5%88%97%E8%A1%A8%E6%96%87%E4%BB%B6/">LV-1</a>
                </span>
            </li>
            
            <li class="list-group-item">
                <span class="post-title">
                    <a href="/2020/07/15/python%20work/spacy%E7%9A%84%E5%9F%BA%E7%A1%80%E5%AD%A6%E4%B9%A0/">spacy的基础学习</a>
                </span>
            </li>
            
            <li class="list-group-item">
                <span class="post-title">
                    <a href="/2020/08/19/python%20work/%E5%AF%B9UCDDB%E6%95%B0%E6%8D%AE%E5%BA%93%E8%BF%9B%E8%A1%8C%E7%89%B9%E5%BE%81%E6%8F%90%E5%8F%96/">对UCDDB数据库进行特征提取</a>
                </span>
            </li>
            
            <li class="list-group-item">
                <span class="post-title">
                    <a href="/2020/09/25/python%20work/%E6%95%B0%E5%AD%A6%E5%BB%BA%E6%A8%A1%E9%97%AE%E9%A2%981/">数学建模问题1</a>
                </span>
            </li>
            
            <li class="list-group-item">
                <span class="post-title">
                    <a href="/2020/07/08/python%20work/%E7%8A%B6%E6%80%81%E4%B8%BA3%E6%97%B6%E7%9A%84%E8%AE%BE%E5%AE%9A/">状态为3时的设定</a>
                </span>
            </li>
            
            <li class="list-group-item">
                <span class="post-title">
                    <a href="/2020/08/31/python%20work/%E5%88%86%E7%B1%BB%E6%A8%A1%E5%9E%8B%E7%9A%84%E7%A1%AE%E5%AE%9A/">分类模型的确定</a>
                </span>
            </li>
            
            <li class="list-group-item">
                <span class="post-title">
                    <a href="/2020/08/20/python%20work/%E9%87%8D%E6%96%B0%E6%9E%84%E5%BB%BAslpdb%E4%B8%AD%E7%9A%84%E7%89%B9%E5%BE%81%E5%92%8C%E6%A0%87%E7%AD%BE/">重新构建slpdb中的特征和标签</a>
                </span>
            </li>
            
        </ul>
    </div>
</aside>
    
    
    <aside id="gal-sets">
        <div class="panel panel-gal hidden-xs" data-aos="fade-up" data-aos-duration="2000">
            <ul class="nav nav-pills pills-gal">

                
                <li>
                    <a href="/2020/07/15/python%20work/spacy%E7%9A%84%E5%9F%BA%E7%A1%80%E5%AD%A6%E4%B9%A0/index.html#sidebar-tags" data-toggle="tab" id="tags-tab">热门标签</a>
                </li>
                
                
                <li>
                    <a href="/2020/07/15/python%20work/spacy%E7%9A%84%E5%9F%BA%E7%A1%80%E5%AD%A6%E4%B9%A0/index.html#sidebar-friend-links" data-toggle="tab" id="friend-links-tab">友情链接</a>
                </li>
                
                
                <li>
                    <a href="/2020/07/15/python%20work/spacy%E7%9A%84%E5%9F%BA%E7%A1%80%E5%AD%A6%E4%B9%A0/index.html#sidebar-links" data-toggle="tab" id="links-tab">个人链接</a>
                </li>
                
            </ul>
            <div class="tab-content">
                
                <div class="cloud-tags tab-pane nav bs-sidenav fade" id="sidebar-tags">
    
    <a href="/tags/py-study/" style="font-size: 19.065579704007277px;" class="tag-cloud-link">py_study</a>
    
    <a href="/tags/nlp/" style="font-size: 19.84194551328715px;" class="tag-cloud-link">nlp</a>
    
    <a href="/tags/Graduation-work/" style="font-size: 9.812454049122305px;" class="tag-cloud-link">Graduation work</a>
    
    <a href="/tags/work/" style="font-size: 12.447970179340384px;" class="tag-cloud-link">work</a>
    
    <a href="/tags/hexo/" style="font-size: 13.98984627148485px;" class="tag-cloud-link">hexo</a>
    
    <a href="/tags/%E4%B8%AA%E4%BA%BA%E5%8D%9A%E5%AE%A2%E6%90%AD%E5%BB%BA/" style="font-size: 17.375255933950967px;" class="tag-cloud-link">-个人博客搭建</a>
    
    <a href="/tags/malab-%E6%AF%95%E4%B8%9A/" style="font-size: 9.379836559194928px;" class="tag-cloud-link">-malab -毕业</a>
    
    <a href="/tags/python-%E4%BA%BA%E5%B7%A5%E6%99%BA%E8%83%BD/" style="font-size: 12.16192372321727px;" class="tag-cloud-link">-python -人工智能</a>
    
    <a href="/tags/python/" style="font-size: 10.129189599990383px;" class="tag-cloud-link">python</a>
    
    <a href="/tags/python/" style="font-size: 11.045171558808203px;" class="tag-cloud-link">-python</a>
    
    <a href="/tags/mathematical-modeling/" style="font-size: 9.65983434923012px;" class="tag-cloud-link">mathematical modeling</a>
    
    <a href="/tags/statistical-learning/" style="font-size: 15.80807033021279px;" class="tag-cloud-link">statistical learning</a>
    
</div>
                
                
                <div class="friend-links tab-pane nav bs-sidenav fade" id="sidebar-friend-links">
    
    <li>
        <a href="http://kdays.net/days/" target="_blank">KDays Forum</a>
    </li>
    
    <li>
        <a href="http://www.gal123.com/" target="_blank">绅士导航♂</a>
    </li>
    
    <li>
        <a href="http://www.moe123.com/" target="_blank">萌导航</a>
    </li>
    
</div>
                
                
                <div class="links tab-pane nav bs-sidenav fade" id="sidebar-links">
    
    <li>
        <a href="https://github.com/ZEROKISEKI/" target="_blank">Github</a>
    </li>
    
    <li>
        <a href="https://coding.net/u/SORA1" target="_blank">Coding</a>
    </li>
    
    <li>
        <a href="https://www.zhihu.com/people/aonosora/activities" target="_blank">知乎</a>
    </li>
    
</div>
                
            </div>
        </div>
    </aside>
    
</aside>
            </div>
        </div>
    </div>
    <footer id="gal-footer">
    <div class="container">
        Copyright © 2018 esy Powered by <a href="https://hexo.io/" target="_blank">Hexo</a>.&nbsp;Theme by <a href="https://github.com/ZEROKISEKI" target="_blank">AONOSORA</a>
    </div>
</footer>

<!-- 回到顶端 -->
<div id="gal-gotop">
    <i class="fa fa-angle-up"></i>
</div>
</body>

<script src="/js/activate-power-mode.js"></script>

<script>

    // 配置highslide
	hs.graphicsDir = '/js/highslide/graphics/'
    hs.outlineType = "rounded-white";
    hs.dimmingOpacity = 0.8;
    hs.outlineWhileAnimating = true;
    hs.showCredits = false;
    hs.captionEval = "this.thumb.alt";
    hs.numberPosition = "caption";
    hs.align = "center";
    hs.transitions = ["expand", "crossfade"];
    hs.lang.number = '共%2张图, 当前是第%1张';
    hs.addSlideshow({
      interval: 5000,
      repeat: true,
      useControls: true,
      fixedControls: "fit",
      overlayOptions: {
        opacity: 0.75,
        position: "bottom center",
        hideOnMouseOut: true
      }
    })

    // 初始化aos
    AOS.init({
      duration: 1000,
      delay: 0,
      easing: 'ease-out-back'
    });

</script>
<script>
	POWERMODE.colorful = 'true';    // make power mode colorful
	POWERMODE.shake = 'true';       // turn off shake
	// TODO 这里根据具体情况修改
	document.body.addEventListener('input', POWERMODE);
</script>
<script>
    window.slideConfig = {
      prefix: '/imgs/slide/background',
      ext: 'jpg',
      maxCount: '6'
    }
</script>

<script src="/js/hs.js"></script>
<script src="/js/blog.js"></script>



<script src="/js/oni.js"></script>




</html>